library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.3 v dplyr 1.0.7
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(cluster)
library(viridis)
## Loading required package: viridisLite
library(leaflet)
library(mapview)
library(tmap)
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(purrr)
demographics <- read_rds("demographics.rds")
demographics %>% head()
## # A tibble: 6 x 22
## GEOID county state geometry medinc l_spanish l_russian
## <chr> <chr> <chr> <MULTIPOLYGON [°]> <dbl> <dbl> <dbl>
## 1 48081 Coke County Texas (((-100.825 31.74941, -1~ 25556 0.484 0
## 2 48273 Kleberg Coun~ Texas (((-97.3178 27.49456, -9~ 22844 0.491 0
## 3 48203 Harrison Cou~ Texas (((-94.70215 32.45618, -~ 26572 0.532 0.00367
## 4 48223 Hopkins Coun~ Texas (((-95.86333 33.04989, -~ 26608 0.696 0.00374
## 5 48033 Borden County Texas (((-101.6913 32.96184, -~ 35926 0.560 0
## 6 48419 Shelby County Texas (((-94.51143 31.97398, -~ 22658 0.903 0
## # ... with 15 more variables: l_korean <dbl>, l_chinese <dbl>,
## # l_vietnamese <dbl>, l_other_lang <dbl>, p_e_less <dbl>, p_foreign <dbl>,
## # hispanic <dbl>, white_alone <dbl>, black_alone <dbl>,
## # amercian_Indian_alone <dbl>, asian_alone <dbl>, pacific_alone <dbl>,
## # other_race <dbl>, two_or_more_race <dbl>, cluster <dbl>
set.seed(23994)
vars <- demographics %>% select(-GEOID, -county, -state, -geometry) %>% colnames()
distance_matrix <- cluster::daisy(demographics[,vars])
res.pam <- cluster::pam(x=distance_matrix, k = 5, cluster.only = F)
demographics$cluster <- as.double(res.pam$clustering)
demographics %>% filter(county == "King County" & state == "Washington") %>% select(cluster)
## # A tibble: 1 x 1
## cluster
## <dbl>
## 1 4
demographics_match <- demographics %>% filter(cluster == 4)
demographics_match %>% select(vars) %>% summary()
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(vars)` instead of `vars` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## medinc l_spanish l_russian l_korean
## Min. :34758 Min. :0.0000 Min. :0.00000 Min. :0.0000000
## 1st Qu.:36266 1st Qu.:0.5295 1st Qu.:0.01517 1st Qu.:0.0003992
## Median :38290 Median :0.6387 Median :0.04257 Median :0.0148234
## Mean :39826 Mean :0.6221 Mean :0.07254 Mean :0.0277206
## 3rd Qu.:41251 3rd Qu.:0.7251 3rd Qu.:0.09629 3rd Qu.:0.0368839
## Max. :70390 Max. :2.7113 Max. :0.50000 Max. :0.4549791
## l_chinese l_vietnamese l_other_lang p_e_less
## Min. :0.00000 Min. :0.000000 Min. : 0.00000 Min. :0.00000
## 1st Qu.:0.01203 1st Qu.:0.001317 1st Qu.: 0.01973 1st Qu.:0.01426
## Median :0.04386 Median :0.015026 Median : 0.04196 Median :0.03125
## Mean :0.08001 Mean :0.031270 Mean : 0.11733 Mean :0.04719
## 3rd Qu.:0.10086 3rd Qu.:0.040162 3rd Qu.: 0.09845 3rd Qu.:0.05821
## Max. :1.14146 Max. :0.259309 Max. :10.18090 Max. :0.24534
## p_foreign hispanic white_alone black_alone
## Min. :0.00000 Min. :0.003497 Min. :0.1659 Min. :0.00000
## 1st Qu.:0.03245 1st Qu.:0.032491 1st Qu.:0.7344 1st Qu.:0.01094
## Median :0.06664 Median :0.063184 Median :0.8650 Median :0.03521
## Mean :0.09353 Mean :0.097290 Mean :0.8135 Mean :0.06794
## 3rd Qu.:0.12274 3rd Qu.:0.130582 3rd Qu.:0.9281 3rd Qu.:0.09667
## Max. :0.43173 Max. :0.688263 Max. :0.9862 Max. :0.62672
## amercian_Indian_alone asian_alone pacific_alone other_race
## Min. :0.000000 Min. :0.00000 Min. :0.000e+00 Min. :0.00000
## 1st Qu.:0.001752 1st Qu.:0.00934 1st Qu.:2.048e-05 1st Qu.:0.00652
## Median :0.003019 Median :0.02552 Median :3.713e-04 Median :0.01404
## Mean :0.012330 Mean :0.04548 Mean :2.745e-03 Mean :0.02624
## 3rd Qu.:0.005844 3rd Qu.:0.05470 3rd Qu.:8.703e-04 3rd Qu.:0.03037
## Max. :0.512357 Max. :0.42657 Max. :2.727e-01 Max. :0.51620
## two_or_more_race cluster
## Min. :0.003073 Min. :4
## 1st Qu.:0.018742 1st Qu.:4
## Median :0.025988 Median :4
## Mean :0.031794 Mean :4
## 3rd Qu.:0.036374 3rd Qu.:4
## Max. :0.232220 Max. :4
number of counties in each cluster
data <- demographics %>% select(vars)
data %>%
split(.$cluster) %>%
map(summary)
## $`1`
## medinc l_spanish l_russian l_korean
## Min. :22978 Min. :0.0000 Min. :0.000000 Min. :0.000000
## 1st Qu.:23919 1st Qu.:0.4725 1st Qu.:0.000000 1st Qu.:0.000000
## Median :24736 Median :0.6296 Median :0.004501 Median :0.000000
## Mean :24625 Mean :0.6402 Mean :0.031919 Mean :0.013699
## 3rd Qu.:25364 3rd Qu.:0.7538 3rd Qu.:0.028705 3rd Qu.:0.009624
## Max. :26032 Max. :7.6667 Max. :1.445652 Max. :0.467391
## l_chinese l_vietnamese l_other_lang p_e_less
## Min. :0.000000 Min. :0.00000 Min. : 0.00000 Min. :0.000000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.: 0.00000 1st Qu.:0.007435
## Median :0.003293 Median :0.00000 Median : 0.01269 Median :0.015863
## Mean :0.031482 Mean :0.01852 Mean : 0.10947 Mean :0.030518
## 3rd Qu.:0.024718 3rd Qu.:0.01240 3rd Qu.: 0.04829 3rd Qu.:0.034176
## Max. :1.205496 Max. :1.56757 Max. :11.44068 Max. :0.346853
## p_foreign hispanic white_alone black_alone
## Min. :0.00000 Min. :0.0002395 Min. :0.1088 Min. :0.000000
## 1st Qu.:0.01221 1st Qu.:0.0207827 1st Qu.:0.7526 1st Qu.:0.007527
## Median :0.02374 Median :0.0384550 Median :0.8899 Median :0.026496
## Mean :0.03907 Mean :0.0927959 Mean :0.8317 Mean :0.089777
## 3rd Qu.:0.04627 3rd Qu.:0.0967492 3rd Qu.:0.9470 3rd Qu.:0.120869
## Max. :0.53720 Max. :0.8256242 Max. :0.9971 Max. :0.726194
## amercian_Indian_alone asian_alone pacific_alone
## Min. :0.000000 Min. :0.000000 Min. :0.0000000
## 1st Qu.:0.001737 1st Qu.:0.002523 1st Qu.:0.0000000
## Median :0.003939 Median :0.005141 Median :0.0001928
## Mean :0.023894 Mean :0.008023 Mean :0.0007980
## 3rd Qu.:0.009953 3rd Qu.:0.009396 3rd Qu.:0.0008423
## Max. :0.839403 Max. :0.103458 Max. :0.0223422
## other_race two_or_more_race cluster
## Min. :0.000000 Min. :0.00000 Min. :1
## 1st Qu.:0.002973 1st Qu.:0.01388 1st Qu.:1
## Median :0.008294 Median :0.02035 Median :1
## Mean :0.021147 Mean :0.02464 Mean :1
## 3rd Qu.:0.022122 3rd Qu.:0.02939 3rd Qu.:1
## Max. :0.335762 Max. :0.17613 Max. :1
##
## $`2`
## medinc l_spanish l_russian l_korean
## Min. : 8641 Min. : 0.0000 Min. :0.00000 Min. :0.000000
## 1st Qu.:19688 1st Qu.: 0.4801 1st Qu.:0.00000 1st Qu.:0.000000
## Median :21188 Median : 0.6620 Median :0.00000 Median :0.000000
## Mean :20609 Mean : 0.7561 Mean :0.02554 Mean :0.015585
## 3rd Qu.:22104 3rd Qu.: 0.8315 3rd Qu.:0.01105 3rd Qu.:0.004766
## Max. :22957 Max. :22.2500 Max. :6.00000 Max. :1.054545
## l_chinese l_vietnamese l_other_lang p_e_less
## Min. :0.00000 Min. :0.000000 Min. : 0.00000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.: 0.00000 1st Qu.:0.005259
## Median :0.00000 Median :0.000000 Median : 0.00636 Median :0.012590
## Mean :0.03794 Mean :0.016851 Mean : 0.53830 Mean :0.032879
## 3rd Qu.:0.01479 3rd Qu.:0.007121 3rd Qu.: 0.04837 3rd Qu.:0.028669
## Max. :2.94118 Max. :1.385417 Max. :63.22222 Max. :0.536325
## p_foreign hispanic white_alone black_alone
## Min. :0.000000 Min. :0.0002701 Min. :0.0360 Min. :0.00000
## 1st Qu.:0.009438 1st Qu.:0.0161756 1st Qu.:0.5790 1st Qu.:0.00861
## Median :0.017783 Median :0.0292859 Median :0.8060 Median :0.05525
## Mean :0.033822 Mean :0.1031399 Mean :0.7342 Mean :0.18233
## 3rd Qu.:0.035351 3rd Qu.:0.0633609 3rd Qu.:0.9368 3rd Qu.:0.33606
## Max. :0.399427 Max. :0.9917444 Max. :0.9983 Max. :0.87226
## amercian_Indian_alone asian_alone pacific_alone
## Min. :0.000000 Min. :0.000000 Min. :0.0000000
## 1st Qu.:0.001062 1st Qu.:0.001175 1st Qu.:0.0000000
## Median :0.003038 Median :0.003842 Median :0.0000000
## Mean :0.037187 Mean :0.005893 Mean :0.0005395
## 3rd Qu.:0.009817 3rd Qu.:0.007362 3rd Qu.:0.0004996
## Max. :0.933240 Max. :0.084682 Max. :0.0156192
## other_race two_or_more_race cluster
## Min. :0.000000 Min. :0.000000 Min. :2
## 1st Qu.:0.001899 1st Qu.:0.009727 1st Qu.:2
## Median :0.006319 Median :0.016546 Median :2
## Mean :0.018075 Mean :0.021774 Mean :2
## 3rd Qu.:0.018914 3rd Qu.:0.025813 3rd Qu.:2
## Max. :0.344488 Max. :0.178688 Max. :2
##
## $`3`
## medinc l_spanish l_russian l_korean
## Min. :26045 Min. : 0.0000 Min. :0.00000 Min. :0.000000
## 1st Qu.:26686 1st Qu.: 0.4788 1st Qu.:0.00000 1st Qu.:0.000000
## Median :27345 Median : 0.6112 Median :0.01063 Median :0.001029
## Mean :27460 Mean : 0.6534 Mean :0.05664 Mean :0.020000
## 3rd Qu.:28206 3rd Qu.: 0.7421 3rd Qu.:0.04194 3rd Qu.:0.012964
## Max. :29248 Max. :10.5000 Max. :8.85714 Max. :5.000000
## l_chinese l_vietnamese l_other_lang p_e_less
## Min. :0.00000 Min. :0.0000000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.0000000 1st Qu.:0.00142 1st Qu.:0.008511
## Median :0.00870 Median :0.0000682 Median :0.01579 Median :0.018072
## Mean :0.04220 Mean :0.0157165 Mean :0.07318 Mean :0.032030
## 3rd Qu.:0.03253 3rd Qu.:0.0124274 3rd Qu.:0.04875 3rd Qu.:0.037927
## Max. :4.54545 Max. :0.4955752 Max. :6.21875 Max. :0.292529
## p_foreign hispanic white_alone black_alone
## Min. :0.00000 Min. :0.0009525 Min. :0.1507 Min. :0.000000
## 1st Qu.:0.01518 1st Qu.:0.0226126 1st Qu.:0.8251 1st Qu.:0.006388
## Median :0.02682 Median :0.0429029 Median :0.9092 Median :0.018415
## Mean :0.04401 Mean :0.0940776 Mean :0.8649 Mean :0.062433
## 3rd Qu.:0.05612 3rd Qu.:0.1029515 3rd Qu.:0.9526 3rd Qu.:0.069891
## Max. :0.30641 Max. :0.7283305 Max. :0.9954 Max. :0.595274
## amercian_Indian_alone asian_alone pacific_alone
## Min. :0.000000 Min. :0.000000 Min. :0.0000000
## 1st Qu.:0.001930 1st Qu.:0.003510 1st Qu.:0.0000000
## Median :0.004002 Median :0.006407 Median :0.0002159
## Mean :0.015338 Mean :0.010656 Mean :0.0008609
## 3rd Qu.:0.009756 3rd Qu.:0.012666 3rd Qu.:0.0007981
## Max. :0.782648 Max. :0.196750 Max. :0.0351223
## other_race two_or_more_race cluster
## Min. :0.000000 Min. :0.00000 Min. :3
## 1st Qu.:0.003378 1st Qu.:0.01477 1st Qu.:3
## Median :0.009378 Median :0.02081 Median :3
## Mean :0.021382 Mean :0.02447 Mean :3
## 3rd Qu.:0.021863 3rd Qu.:0.03047 3rd Qu.:3
## Max. :0.475813 Max. :0.17007 Max. :3
##
## $`4`
## medinc l_spanish l_russian l_korean
## Min. :34758 Min. :0.0000 Min. :0.00000 Min. :0.0000000
## 1st Qu.:36266 1st Qu.:0.5295 1st Qu.:0.01517 1st Qu.:0.0003992
## Median :38290 Median :0.6387 Median :0.04257 Median :0.0148234
## Mean :39826 Mean :0.6221 Mean :0.07254 Mean :0.0277206
## 3rd Qu.:41251 3rd Qu.:0.7251 3rd Qu.:0.09629 3rd Qu.:0.0368839
## Max. :70390 Max. :2.7113 Max. :0.50000 Max. :0.4549791
## l_chinese l_vietnamese l_other_lang p_e_less
## Min. :0.00000 Min. :0.000000 Min. : 0.00000 Min. :0.00000
## 1st Qu.:0.01203 1st Qu.:0.001317 1st Qu.: 0.01973 1st Qu.:0.01426
## Median :0.04386 Median :0.015026 Median : 0.04196 Median :0.03125
## Mean :0.08001 Mean :0.031270 Mean : 0.11733 Mean :0.04719
## 3rd Qu.:0.10086 3rd Qu.:0.040162 3rd Qu.: 0.09845 3rd Qu.:0.05821
## Max. :1.14146 Max. :0.259309 Max. :10.18090 Max. :0.24534
## p_foreign hispanic white_alone black_alone
## Min. :0.00000 Min. :0.003497 Min. :0.1659 Min. :0.00000
## 1st Qu.:0.03245 1st Qu.:0.032491 1st Qu.:0.7344 1st Qu.:0.01094
## Median :0.06664 Median :0.063184 Median :0.8650 Median :0.03521
## Mean :0.09353 Mean :0.097290 Mean :0.8135 Mean :0.06794
## 3rd Qu.:0.12274 3rd Qu.:0.130582 3rd Qu.:0.9281 3rd Qu.:0.09667
## Max. :0.43173 Max. :0.688263 Max. :0.9862 Max. :0.62672
## amercian_Indian_alone asian_alone pacific_alone other_race
## Min. :0.000000 Min. :0.00000 Min. :0.000e+00 Min. :0.00000
## 1st Qu.:0.001752 1st Qu.:0.00934 1st Qu.:2.048e-05 1st Qu.:0.00652
## Median :0.003019 Median :0.02552 Median :3.713e-04 Median :0.01404
## Mean :0.012330 Mean :0.04548 Mean :2.745e-03 Mean :0.02624
## 3rd Qu.:0.005844 3rd Qu.:0.05470 3rd Qu.:8.703e-04 3rd Qu.:0.03037
## Max. :0.512357 Max. :0.42657 Max. :2.727e-01 Max. :0.51620
## two_or_more_race cluster
## Min. :0.003073 Min. :4
## 1st Qu.:0.018742 1st Qu.:4
## Median :0.025988 Median :4
## Mean :0.031794 Mean :4
## 3rd Qu.:0.036374 3rd Qu.:4
## Max. :0.232220 Max. :4
##
## $`5`
## medinc l_spanish l_russian l_korean
## Min. :29284 Min. : 0.0000 Min. :0.000000 Min. :0.000000
## 1st Qu.:30369 1st Qu.: 0.5028 1st Qu.:0.002048 1st Qu.:0.000000
## Median :31209 Median : 0.6264 Median :0.017469 Median :0.003615
## Mean :31385 Mean : 0.6305 Mean :0.044419 Mean :0.013005
## 3rd Qu.:32224 3rd Qu.: 0.7351 3rd Qu.:0.048310 3rd Qu.:0.014192
## Max. :34738 Max. :16.0000 Max. :1.841727 Max. :0.536585
## l_chinese l_vietnamese l_other_lang p_e_less
## Min. :0.00000 Min. :0.000000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.003471 1st Qu.:0.009475
## Median :0.01028 Median :0.002494 Median :0.019176 Median :0.019689
## Mean :0.03488 Mean :0.019451 Mean :0.058934 Mean :0.033994
## 3rd Qu.:0.03889 3rd Qu.:0.021348 3rd Qu.:0.056281 3rd Qu.:0.041931
## Max. :0.67234 Max. :1.000000 Max. :1.551724 Max. :0.322680
## p_foreign hispanic white_alone black_alone
## Min. :0.00000 Min. :0.0003914 Min. :0.3046 Min. :0.000000
## 1st Qu.:0.01752 1st Qu.:0.0267769 1st Qu.:0.8321 1st Qu.:0.005879
## Median :0.03314 Median :0.0487612 Median :0.9229 Median :0.013723
## Mean :0.05219 Mean :0.0885606 Mean :0.8757 Mean :0.052872
## 3rd Qu.:0.06479 3rd Qu.:0.0995825 3rd Qu.:0.9582 3rd Qu.:0.057868
## Max. :0.47238 Max. :0.6645812 Max. :0.9942 Max. :0.623532
## amercian_Indian_alone asian_alone pacific_alone other_race
## Min. :0.000000 Min. :0.000000 Min. :0.0000000 Min. :0.00000
## 1st Qu.:0.001863 1st Qu.:0.004097 1st Qu.:0.0000000 1st Qu.:0.00394
## Median :0.003553 Median :0.008007 Median :0.0002134 Median :0.01018
## Mean :0.009915 Mean :0.015828 Mean :0.0010476 Mean :0.02078
## 3rd Qu.:0.007727 3rd Qu.:0.016925 3rd Qu.:0.0007089 3rd Qu.:0.02183
## Max. :0.386749 Max. :0.330198 Max. :0.1220201 Max. :0.55425
## two_or_more_race cluster
## Min. :0.00000 Min. :5
## 1st Qu.:0.01403 1st Qu.:5
## Median :0.02085 Median :5
## Mean :0.02389 Mean :5
## 3rd Qu.:0.02971 3rd Qu.:5
## Max. :0.28886 Max. :5
#table(demographics$cluster, exclude = 'nothing')
demographics %>% filter(cluster == 4) %>% st_sf() %>% mapview(zcol = "cluster")
demographics_sample <- demographics_match %>%
slice_sample(n = 40)
demographics_sample %>% select(county, state)
## # A tibble: 40 x 2
## county state
## <chr> <chr>
## 1 Juneau City and Borough Alaska
## 2 Cumberland County Pennsylvania
## 3 District of Columbia District of Columbia
## 4 Grundy County Illinois
## 5 Worcester County Massachusetts
## 6 Clarke County Virginia
## 7 El Dorado County California
## 8 Cobb County Georgia
## 9 Manassas Park city Virginia
## 10 Lander County Nevada
## # ... with 30 more rows